# This script calculates municipalities' exposure to wildfires:
#   1/ overlay fire perimeter with census tract shapefiles
#   2/ aggregate tract-level exposure (weighted by population) to municipality level
# Output: a place-year panel of fire exposure data (place_panel.csv)

rm(list = ls())

library(tidyverse)
library(sf)
library(measurements)

options(scipen = 999)

############## Specify path to data folder ##############
setwd("~")


############## Overlay fire perimeter with census tract shapefiles ##############

#### Inputs - enter parameters here ----
fire_db <- "fire18_1.gdb" # geodatabase or shapefile containing layers
fire_layer <- "firep18_1" # what fire layer do we want? Only applies for a geodatabase - use NA if fire_db is a shapefile.
fire_year_cutoff <- 1975 # what is our cutoff for the year of fire? We will keep fires from after this date.
years_in_study <- c(1980, 1990, 2000, 2010) # enter the years for which we have tract data

#### Census Tracts ----

# Read in CA tracts, add CENSUS_YEAR column, keep only GEOID and CENSUS_YEAR columns #

tracts_CA_2010 <- st_read("nhgis0002_shape/CA_tracts/tracts_CA_2010.shp", crs = 3857) %>% 
  st_transform(3310) %>% 
  mutate(CENSUS_YEAR = 2010,
         tract_area = conv_unit(as.numeric(st_area(.)), from = "m2", to = "acre")) %>% # find area of the tract in acres
  dplyr::select(GEOID = GEOID10,
                CENSUS_YEAR,
                tract_area)

tracts_CA_2000 <- st_read("nhgis0002_shape/CA_tracts/tracts_CA_2000.shp", crs = 3857) %>% 
  st_transform(3310) %>% 
  mutate(CENSUS_YEAR = 2000,
         tract_area = conv_unit(as.numeric(st_area(.)), from = "m2", to = "acre")) %>% 
  dplyr::select(GEOID = GISJOIN2,
                CENSUS_YEAR,
                tract_area)

tracts_CA_1990 <- st_read("nhgis0002_shape/CA_tracts/tracts_CA_1990.shp", crs = 3857) %>% 
  st_transform(3310) %>% 
  mutate(CENSUS_YEAR = 1990,
         tract_area = conv_unit(as.numeric(st_area(.)), from = "m2", to = "acre")) %>% 
  dplyr::select(GEOID = GISJOIN2,
                CENSUS_YEAR,
                tract_area)

tracts_CA_1980 <- st_read("nhgis0002_shape/CA_tracts/tracts_CA_1980.shp", crs = 3857) %>% 
  st_transform(3310) %>% 
  mutate(CENSUS_YEAR = 1980,
         tract_area = conv_unit(as.numeric(st_area(.)), from = "m2", to = "acre")) %>% 
  dplyr::select(GEOID = GISJOIN2,
                CENSUS_YEAR,
                tract_area)

# Flatten CA tracts into one dataframe

tracts_CA_all_yrs <- rbind(tracts_CA_2010, tracts_CA_2000, tracts_CA_1990, tracts_CA_1980) 

#### Wildfire ----

# Functions - custom functions for analysis here ####
fire_round <- function(x,
                       earliest = 1980, # earliest available census tracts shapes
                       latest = 2010 # latest available 
){
  y <- case_when(x < fire_year_cutoff ~ fire_year_cutoff, # note that fires more than five years older than the cutoff must be filtered out first.
                 x > latest ~ latest,
                 TRUE ~ round(x, -1))
  return(y)
}

find_tract_fireExposure <- function(tracts, # tracts - needs to be an sf object
                                    fires, # fires - needs to be an sf object
                                    round_year, # which census year are you looking at?
                                    grouped_summary = FALSE # if TRUE, returns a grouped summary by tract instead
){
  
  # a - filter tracts and fires by year
  tracts_year <- tracts %>% filter(CENSUS_YEAR == round_year) %>% st_buffer(dist = 0) # adding a 0 buffer resolves st_intersection error
  fires_year <- fires %>% filter(FIRE_YEAR_ROUND == round_year) %>% st_buffer(dist = 0)
  
  # b - Find every fire that occurred in a tract. If a tract experienced 4 fires, it would have 4 rows
  fire_to_tracts <- st_join(tracts_year, # geometry is tracts
                            fires_year)
  
  # c - for each fire, find its area in the intersecting tract
  intersection <- st_intersection(fires_year, # geometry is intersection of fires and tracts
                                  tracts_year) %>% 
    mutate(FIRE_AREA_IN_TRACT = conv_unit(as.numeric(st_area(.)), from = "m2", to = "acre")) %>% # convert from square meters to acres
    as.data.frame() %>% 
    select(GEOID, KEY, FIRE_NAME, FIRE_NUM, FIRE_YEAR, FIRE_AREA_IN_TRACT) # select only the columns for the subsequent join
  
  # d - join c to b
  tracts_with_fireArea <- fire_to_tracts %>% # geometry is tracts
    left_join(intersection, by = c("KEY", "GEOID", "FIRE_NAME", "FIRE_NUM", "FIRE_YEAR")) ## NOTE -- Can we delete everything but KEY?  
  
  # e - The total land area of all fires in a tract. One row per tract. 
  tracts_with_fireArea_grouped <- tracts_with_fireArea %>% 
    as.data.frame() %>% # make a regular data frame
    select(-geometry) %>%  # remove geometry column
    group_by(GEOID) %>% 
    summarize(TOTAL_FIRE_AREA_IN_TRACT = sum(FIRE_AREA_IN_TRACT, na.rm = TRUE)) # for each tract, sum up the total fire area, across all fires
  
  # f - final output. join e to d
  final <- tracts_with_fireArea %>% 
    left_join(tracts_with_fireArea_grouped, by = "GEOID")
  
  # g - alternative summary output. join grouped summary back to tracts.
  summary <- tracts_year %>% 
    left_join(tracts_with_fireArea_grouped, by = "GEOID") %>% 
    st_as_sf()
  
  if (grouped_summary == FALSE) {
    result <- final
  }
  else {
    result <- summary
  }
  return(result)
}

# Analysis ----
# Clean Data
fire <- st_read(dsn = fire_db, layer = fire_layer, crs = 3310) %>%  # Choose the fire layer we want
  st_cast("MULTIPOLYGON") %>% # re-cast .gdb layer so the intersections with the census tracts work 
  st_transform(crs = st_crs(tracts_CA_all_yrs))  # re-project to web mercator

fire_clean <- fire %>% 
  mutate(KEY = row_number(),
         FIRE_YEAR = as.numeric(as.character(YEAR_))) %>% # convert year column from factor to number
  filter(FIRE_YEAR >= fire_year_cutoff) %>%   # keep only fires after cutoff year
  dplyr::select(KEY, FIRE_NAME, FIRE_NUM, FIRE_YEAR, STATE, AGENCY, UNIT_ID, INC_NUM, ALARM_DATE, CONT_DATE, # keep only some columns
                CAUSE, COMMENTS, REPORT_AC, C_METHOD, OBJECTIVE) %>% 
  mutate(FIRE_YEAR_ROUND = fire_round(FIRE_YEAR), # round year to nearest decade
         FIRE_ENTIRE_AREA = conv_unit(as.numeric(st_area(.)), from = "m2", to = "acre")) # find area of the ENTIRE fire in acres

# Loop over all years in the study
tracts_split <- split(tracts_CA_all_yrs, tracts_CA_all_yrs$CENSUS_YEAR) # turn tracts data frame into a list of lists separated by year

fire_list <- map2(.x = tracts_split, .y = years_in_study, # loop the analysis function over the tract years and years in the study
                  ~ find_tract_fireExposure(
                    tracts = .x,
                    fires = fire_clean,
                    round_year = .y,
                    grouped_summary = FALSE))
fire_df <- do.call(rbind, fire_list) # Final output 

# detailed summary
fire_df <- fire_df %>% as.data.frame() %>% select(-geometry)


############## Aggregate tract-level fire exposure to municipality-level ##############

### Aggregate to tract-year level

# correct the GEOID issue with 1980-2000
TractFire1 <- fire_df %>% filter(CENSUS_YEAR <= 2000)

TractFire1 <- TractFire1 %>%
  mutate(GEOID = paste(GEOID, "00", sep = "")) %>%
  mutate(TRACTID = paste(substr(GEOID, 1, 2),
                         substr(GEOID, 4, 6),
                         substr(GEOID, 8, 13),
                         sep = ""))

TractFire2 <- fire_df %>% filter(CENSUS_YEAR == 2010) %>%
  mutate(TRACTID = GEOID)

TractFire <- rbind(TractFire1, TractFire2)
rm(TractFire1, TractFire2)

# collapse into single census tracts
TractCensus <- TractFire %>% group_by(TRACTID, CENSUS_YEAR) %>% 
  summarize(TRACT_AREA = mean(tract_area, na.rm = TRUE))

# aggregate fire areas over tract-year
if (LargeFire == 1) {
  TractYear <- TractFire %>% 
    filter(!is.na(FIRE_YEAR), FIRE_ENTIRE_AREA >= 300) %>%
    group_by(TRACTID, FIRE_YEAR) %>%
    summarize(CENSUS_YEAR = unique(CENSUS_YEAR)[1],
              FIRE_AREA_IN_TRACT = sum(FIRE_AREA_IN_TRACT, na.rm = TRUE))
} else {
  TractYear <- TractFire %>% 
    filter(!is.na(FIRE_YEAR)) %>%
    group_by(TRACTID, FIRE_YEAR) %>%
    summarize(CENSUS_YEAR = unique(CENSUS_YEAR)[1],
              FIRE_AREA_IN_TRACT = sum(FIRE_AREA_IN_TRACT, na.rm = TRUE))
}

# merge back the tract area
TractYear <- left_join(TractYear, TractCensus, 
                       by = c("TRACTID", "CENSUS_YEAR"))

# calculate percentage of areas exposed to fire
TractYear$PERCENT_FIRE <- TractYear$FIRE_AREA_IN_TRACT / TractYear$TRACT_AREA



### Create a place-tract-year panel using the crosswalks  

# initialize final dataframe
Final <- data.frame() 

year = 1990

# loop over crosswalks
for (year in c(1990, 2000, 2010)) {
  FilePath <- paste0("tract_muni_", year, ".csv")
  
  # read crosswalks
  if (year == 1990) {
    CrsWalk <- read_csv(FilePath, 
                        col_names = c("county", "tract", "place", "pop", "afact"),
                        skip = 1)
  } else {
    CrsWalk <- read_csv(FilePath,
                        col_names = c("county", "tract", "state", "place", "stab", 
                                      "cntyname", "placenm", "pop", "afact"),
                        skip = 2) %>%
      select(-c("state", "stab", "cntyname", "placenm"))
  }
  
  # generate census tract ID
  CrsWalk <- CrsWalk %>% 
    mutate(TRACTID = paste(county, str_remove(tract, "\\."), sep = "")) %>%
    mutate(TRFLAG = substr(TRACTID, 10, 11) %>% as.numeric()) %>%
    filter(TRFLAG != 99) %>%  # overseas population
    mutate(TRACTID = case_when( # correct for the BNAs
      TRFLAG < 90 ~ TRACTID,
      TRFLAG > 90 ~ paste(substr(TRACTID, 1, 9), "0", as.character(99-TRFLAG), sep = "")
    )) %>% select(-TRFLAG)
  
  # filter out parts of tract not in municipalities
  CrsWalk <- CrsWalk %>% filter(place != "99999")
  
  # create a panel
  CrsWalk <- tibble::rowid_to_column(CrsWalk, "rowID")
  
  if (year == 2010) {
    Panel <- with(CrsWalk, 
                  expand.grid(FIRE_YEAR = seq(2006, 2016), 
                              rowID = unique(rowID)))
  } else if (year == 2000) {
    Panel <- with(CrsWalk, 
                  expand.grid(FIRE_YEAR = seq(1995, 2005), 
                              rowID = unique(rowID)))
  } else {
    Panel <- with(CrsWalk, 
                  expand.grid(FIRE_YEAR = seq(1986, 1994), 
                              rowID = unique(rowID)))
  }
  
  # merge back population and place ID
  Panel <- left_join(Panel, CrsWalk, by = "rowID")
  
  # merge fire data
  Panel <- left_join(Panel, TractYear %>% filter(CENSUS_YEAR == year),
                     by = c("TRACTID", "FIRE_YEAR"))
  
  
  Final <- rbind(Final, Panel)
}


### Collapse the panel to place-year level


# delete tract area data
PlaceYear <- Final %>% select(-TRACT_AREA)

# re-calculate the census year
fire_round <- function(x,
                       earliest = 1980, # earliest available census tracts shapes
                       latest = 2010 # latest available 
){
  y <- case_when(x > latest ~ latest,
                 TRUE ~ round(x, -1))
  return(y)
}

PlaceYear$CENSUS_YEAR <- fire_round(PlaceYear$FIRE_YEAR)

# merge census tract area
PlaceYear <- left_join(PlaceYear, TractCensus, by = c("TRACTID", "CENSUS_YEAR"))

# calculate variables for aggregation (allocation factor has been accounted for in pop)
PlaceYear <- PlaceYear %>% mutate(popfire = PERCENT_FIRE*pop, 
                                  poptotal = pop, 
                                  tract_area = TRACT_AREA*afact)

PlaceYear <- PlaceYear %>% group_by(place, FIRE_YEAR) %>%
  summarize(popfire = sum(popfire, na.rm = TRUE),
            poptotal = sum(poptotal, na.rm = TRUE),
            area = sum(tract_area, na.rm = TRUE))

PlaceYear <- PlaceYear %>% mutate(percent_fire = popfire/poptotal)

PlaceYear$place[PlaceYear$place == "02112"] <- "02132"
PlaceYear$place[PlaceYear$place == "49187"] <- "49194"

write_csv(PlaceYear, "place_panel.csv")
